Set options

# set global knit options
knitr::opts_chunk$set(echo = T,
                      eval = T,
                      error = F,
                      warning = F,
                      message = F,
                      cache = T
                      )

# suppress scientific notation
options(scipen=999)

# list of required packages
packages <- c( #"SIN", # this package was removed from the CRAN repository
               "MASS", "dplyr", "tidyr", "purrr", "extraDistr", "ggplot2", "loo", "bridgesampling", "brms", "bayesplot", "tictoc", "hypr", "bcogsci", "papaja", "grid", "kableExtra", "gridExtra", "lme4", "cowplot", "pdftools", "cmdstanr", "rootSolve", "rstan", "SBC"
  )

## Now load, or install & load, all of the packages listed in 'packages'
package.check <- lapply(
  packages,
  FUN = function(x) {
    if (!require(x, character.only = TRUE)) {
      install.packages(x, dependencies = TRUE)
      library(x, character.only = TRUE)
    }
  }
)
## Loading required package: MASS
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: tidyr
## Loading required package: purrr
## Loading required package: extraDistr
## 
## Attaching package: 'extraDistr'
## The following object is masked from 'package:purrr':
## 
##     rdunif
## Loading required package: ggplot2
## Loading required package: loo
## This is loo version 2.5.1
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
## Loading required package: bridgesampling
## Loading required package: brms
## Loading required package: Rcpp
## Loading 'brms' package (version 2.17.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## 
## Attaching package: 'brms'
## The following object is masked from 'package:bridgesampling':
## 
##     bf
## The following objects are masked from 'package:extraDistr':
## 
##     ddirichlet, dfrechet, pfrechet, qfrechet, rdirichlet, rfrechet
## The following object is masked from 'package:stats':
## 
##     ar
## Loading required package: bayesplot
## This is bayesplot version 1.9.0
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
## Loading required package: tictoc
## Loading required package: hypr
## Loading required package: bcogsci
## Loading required package: papaja
## Loading required package: tinylabels
## Registered S3 method overwritten by 'parameters':
##   method                         from      
##   format.parameters_distribution datawizard
## Loading required package: grid
## Loading required package: kableExtra
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## Loading required package: gridExtra
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
## Loading required package: lme4
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Attaching package: 'lme4'
## The following object is masked from 'package:brms':
## 
##     ngrps
## Loading required package: cowplot
## Loading required package: pdftools
## Using poppler version 22.02.0
## Loading required package: cmdstanr
## This is cmdstanr version 0.5.3
## - CmdStanR documentation and vignettes: mc-stan.org/cmdstanr
## - CmdStan path: /Users/danielapalleschi/.cmdstan/cmdstan-2.30.1
## - CmdStan version: 2.30.1
## 
## A newer version of CmdStan is available. See ?install_cmdstan() to install it.
## To disable this check set option or environment variable CMDSTANR_NO_VER_CHECK=TRUE.
## Loading required package: rootSolve
## Loading required package: rstan
## Loading required package: StanHeaders
## 
## rstan version 2.26.15 (Stan version 2.26.1)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)
## 
## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr':
## 
##     extract
## Loading required package: SBC
# NB: if you haven't already installed bcogsci through devtools, it won't be loaded; to do so:
# devtools::install_github("bnicenboim/bcogsci")

# this is also required, taken from the textbook:
## Save compiled models:
rstan_options(auto_write = FALSE)
## Parallelize the chains using all the cores:
options(mc.cores = parallel::detectCores())
# To solve some conflicts between packages
select <- dplyr::select
extract <- rstan::extract

Or, alternatively, the exact code taken from the book (as of Feb. 12 2023):

library(MASS)
## be careful to load dplyr after MASS
library(dplyr)
library(tidyr)
library(purrr)
library(extraDistr)
library(ggplot2)
library(loo)
library(bridgesampling)
library(brms)
library(bayesplot)
library(tictoc)
library(hypr)
library(bcogsci)
library(lme4)
library(rstan)
library(cmdstanr) # This package is optional, see https://mc-stan.org/cmdstanr/
library(SBC) # This package is optional, see https://hyunjimoon.github.io/SBC/
library(SHELF)
library(rootSolve)

## Save compiled models:
rstan_options(auto_write = FALSE)
## Parallelize the chains using all the cores:
options(mc.cores = parallel::detectCores())
# To solve some conflicts between packages
select <- dplyr::select
extract <- rstan::extract

Terms and abbreviations

Week 1

  • RV: random variable

  • Bernoulli distribution: univariate binomial

  • PMF: probability mass function (binomial)

  • CMF: cumulative mass function (binomial)

  • standard normal distribution: mean = 0, sd = 1

  • f = PDF: probability density function (continuous)

  • CDF: cumulative density function (continuous)

  • k: number of successes

  • n: number of trials

  • \(\theta\): the probability of success

  • AUC: area under the curve

  • univariate: one variable

  • bivariate: 2 variables

  • multivariate: 2+ variables

  • variance = \(sd^2\)

Week 2

  • P(A|B): probability of A given B
  • P(A,B): probability of A and B
  • f = PDF: probability density function (continuous)
    • so f($theta$|y): PDF of theta given a certain value of y

Week 4

  • \(\alpha\): intercept, i.e.,the average effect

1 Week 1: Introduction

1.1 Discrete random variables

  • random variables are functions that map one set to the set of real numbers
    • associates to each outcome to a particular real number

There’s a set of events that can happen, like tossing a coin. A random variable maps each of these possible events to a real number. In tossing a coin until you get a heads, a random variable would be the number of tosses until you get a ‘success’ (heads). In principle it could be an infinite set, bcause you could in theory toss the coin an infinite number of times without ever getting a heads (though this is extremely improbable).

Alternatively, the random variable can be a finite set if we are interested in looking whether one toss results in a head or a tails.

PMF: probability mass function, PDF: probability density function

  • PMF = for discrete distributions
  • PDF: for continuous
  • CDF: cumulative distribution function; gives a mapping from a particular numerical value to a probability; means the probability of getting that number or something less than it.

Simulate tossing a coin 10 times with probability of heads = 0.5, with a Bernoulli random variable.

# simulate tossing a coin 10 times
extraDistr::rbern(n = 10, prob = .5)
##  [1] 1 0 1 0 0 1 0 1 1 0

What’s the probablity of getting a tails or a heads? The d-family of functions:

extraDistr::dbern(0,prob=.5)
## [1] 0.5
extraDistr::dbern(1,prob=.5)
## [1] 0.5

Cumulative probability distribution function: the p-family of functions

# probability distribution function

## for whichever case we coded as 0
extraDistr::pbern(0,.5)
## [1] 0.5
## for whichever case we coded as 1
extraDistr::pbern(1,.5)
## [1] 1

1.2 Discrete random variables: the binomial

When we toss a coin only once, this is a Bernoulli random variable. If we toss the coin more than once, this is a binomial. Both Bernoulli and Binomial have a PMF associated with them.

# generate random binomial data
rbinom(n = 10, size = 1, prob = .5)
##  [1] 1 1 1 0 1 0 0 1 0 0
# compute probabilites of a particular outcome
probs <- round(dbinom(0:10, size = 10, prob = .5),3); probs
##  [1] 0.001 0.010 0.044 0.117 0.205 0.246 0.205 0.117 0.044 0.010 0.001
x <- 0:10
rbind(x,probs)
##        [,1] [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8]  [,9] [,10]  [,11]
## x     0.000 1.00 2.000 3.000 4.000 5.000 6.000 7.000 8.000  9.00 10.000
## probs 0.001 0.01 0.044 0.117 0.205 0.246 0.205 0.117 0.044  0.01  0.001
# compute cumulative probabilities of all possible outcomes
pbinom(0:10, size=10, prob = .5)
##  [1] 0.0009765625 0.0107421875 0.0546875000 0.1718750000 0.3769531250
##  [6] 0.6230468750 0.8281250000 0.9453125000 0.9892578125 0.9990234375
## [11] 1.0000000000

Compute quantiles using the inverse of the CDF: what is the quantile q such that the probability of X is greater than x?

# generate distribution (0:10 outcomes or less, for 10 repetitions, with probability .5)
probs <- pbinom(0:10,size = 10, prob = .5)


# compute the inverse CDF; qbinom takes as its input a probability of an outcome and outputs the inverse
qbinom(probs, size = 10, prob=.5)
##  [1]  0  1  2  3  4  5  6  7  8  9 10

1.2.1 My summary notes

d-p-q-r family of functions

RV Function(arguments) Outcome
Bernoulli rbern(n,prob) generate random data
dbern(x,prob) PMF: probability of outcome x
pbern(q,prob) CMF: cumulative PMF of <=q
Binomial rbinom(n, size, prob) generate random data with n = of successes
dbinom(q,size,prob) PMF: probability of outcome x
pbinom(q,size,prob) CMF: cumulative PMF of <=q
qbinom(p,prob,size) inverse CMF: cumulative PMF of >=x
normal rnorm(n,mean,sd) generate random data
dnorm(x,mean,sd) PDF: density of outcome x
pnorm(q,mean,sd) CDF: cumulative PDF of <=q
pnorm(q,mean,sd, lower.tail=F) inverse CDF: cumulative PDF of >=q
qnorm(p,prob,size) Compute quantiles corresponding to probabilities

1.3 Continuous random variables (PDFs)

  • in discrete RV cases, we compute the probability of a particular outcome
  • in continous RV cases, probability is defined by the area under the curve (AUC)
  • we use the cumulative distribution function associated with a normal distribution to compute this AUC; i.e., the probability of observing a value between x1 and x2

1.4 Continous RVs: the normal distribution

  • the standard normal distribution:
    • Normal(mean = 0, sd = 1)
    • Prob(-1 < X < 1) = 68%
    • Prob(-1.96 < X < 1.96) = 95%
    • Prob(-3 < X <3) = 99.7%
  • for any other normal distribution (with varying mean and sd):
    • Prob(-1*sd < X < 1*sd ) = 68%
    • Prob(-1.96*sd < X < 1.96*sd ) = 95%
    • Prob(-3*sd < X <3*sd ) = 99.7%
  • the continuous vales on the x-axis (here, +/- 1,2,3) are the quantiles

rnorm(n,mean,sd): Generate random data using

rnorm(n=5,
      mean = 0,
      sd = 1)
## [1] -1.7423762  1.5093688  0.5218357  1.3696954  0.3483563

pnorm(q,mean,sd): Compute probabilities using CDF

pnorm(q = 2,
      mean = 0,
      sd = 1)
## [1] 0.9772499

pnorm(q,mean,sd,lower.tail=F): Compute inverse probabilities using CDF

pnorm(q = 2,
      lower.tail=F,
      mean = 0,
      sd = 1) # don't look the left (equal to or less than q), but the right (equal or greater than q)
## [1] 0.02275013

qnorm(p,mean,sd)): Compute quantiles corresponding to probabilities using the inverse of the CDF

qnorm(p = 0.9772499,
      mean = 0,
      sd = 1)
## [1] 2.000001

dnorm(x,mean,sd): Compute the probability density for a quantile value

  • not the probability of a particular outcome
  • rather the density of that particular value, i.e., the y-axis value
dnorm(x = 2,
      mean = 0,
      sd = 1)
## [1] 0.05399097
curve(dnorm(x,0,1), xlim=c(-3,3), main="Normal(0,1)",
      ylab="density")
from.z <- -1
to.z <- 1
S.x  <- c(from.z, seq(from.z, to.z, 0.01), to.z)
S.y  <- c(0, dnorm(seq(from.z, to.z, 0.01)), 0)
polygon(S.x,S.y, col=rgb(1, 0, 0,0.3))
text(-2,0.15,pos=4,cex=1.5,paste("pnorm(1)-pnorm(-1)"))
arrows(x1=2,y1=0.3,x0=1,y0=dnorm(1),code = 1)
text(1.7,0.32,pos=4,cex=1.5,paste("dnorm(1)"))
points(1,dnorm(1))
#points(1,0)
arrows(x1=2,y1=0.1,x0=1,y0=0,code = 1)
text(1,0.12,pos=4,cex=1.5,paste("qnorm(0.841)"))
x<-rnorm(10)
points(x=x,y=rep(0,10),pch=17)
text(-3,0.05,pos=4,cex=1.5,paste("rnorm(10)"))
arrows(x1=-2.5,y1=0.03,x0=min(x),y0=0,code = 1)

1.5 Maximum Likelihood estimates

Spoiler: it’s pretty much the mean/mode/median of normally distributed data (where the three would be the same value); gives us the most likely value that the parameter \(\theta\) has given the data

  • the ‘expectation’ (discrete case)
  • if you were to do an experiment with a larger and larger sample size, you’d get closer to the expected value (e.g., value of .5 successes in repeated coin tosses)
  • we can estimate \(\theta\) (\(\theta\)-hat), but we’ll never know the true value of \(\theta\)

From my book notes from SMLP 2022:

In real exerimental situations we never know the true value of \(\theta\) (probability of an outcome), but it can be derived from the data: \(\theta\) hat = k/n, where k = number of observed successess, n = number of trials, and \(\theta\) hat = observed proportion of successes. \(\theta\) hat = maximum likelihood estimate of the true but unknown parameter \(\theta\). Basically, the mean of the binomial distribution. The variance can also be estimated by computing (n(\(\theta\)))(1 - \(\theta\)). These estimates can be be used for statistical inference.

From my class notes from SMLP 2022:

  • common misunderstanding of the maximum likelihood estimate (MLE): it doesn’t represent the true value of \(\theta\), because it’s the MLE (best guess) for the data you have
  • but the MLE will be closer to the true value of \(\theta\) as sample size increases
  • e.g., the PMF (binomial) contains three terms:
  • k: number of successes
  • n: total number of trials
  • \(\theta\): probability of success (can have values between 0 and 1)
  • the PMF is a function of these parameters, based on the data (given our data we know the values of k and n so they are fixed quantities and no longer random), so \(\theta\) can be treated as a variable
  • the likelihood function is the PMF (or PDF) as a function of the parameters, rather than a function of the data

  • the MLE from a particular sample of data doesn’t necessarily give us an accurate estimate of the true value of \(\theta\) (because it’s just a sample, of course)

    • larger sample sizes get MLEs that more accurately represent the true value of \(\theta\) (again, duh because of the point made above regarding the expectation)

1.6 Bivariate and multivariate distributions (Discrete case)

  • univariate distributions: only one variab le

  • bivariate or multivariable distributions: multiple variables

  • Bernoulli distribution: univariate binomial data

  • joint probability mass function (PMF): the joint distribution of multivariate binomial data

  • joint probability density function would be for continuous data

  • each RV has its own PMF that can be computed separately

  • you can also compute the conditional distribution, i.e., the probability of x given y (x|y) or of y given x (x|y)

    • the conditional probability of x|y is going to be the joint distribution of x,y divided by the marginal distribution of a particular value of y

1.7 Bivariate and multivariate distributions (Continuous case)

  • only going to discuss bivariate distributions here cause they’re easier to conceptualize

  • we’d take into account the mean and sd of each variable, as well as the correlation between the two

  • in a bivariate case, the diagonals (from top left downward) of the VarCorr matrix will contain the variances of either RV

    • the off-diagonals (from bottom left upward) contain the covariance from the two RVs
  • covariance = the correlation of the two RVs multiplied by each of their SDs: Cov(X,Y) = \(\rho\)XY\(\sigma\)X\(\sigma\)Y

  • this allows us to describe how these 2 RVs are jointly distributed

  • in the joint PDF: AUC sums to 1 but will be a 3D cone, rather than a 2D curve as in the univariate case

  • we can also compute the marginal distributions, just as in the bivariate discrete case

  • simulating data:

# generate simulated bivariate data

## define a VarCorr matrix, where rho = .6, variance
Sigma <- matrix(c(
  5^2, 5 * 10 * .6, # variance = sd^2, covariance=sd*sd*rho  (.6)
  .5 * 10 * .6, 10^2 # covariance=sd*sd*rho  (.6) * correlation (.6), variance = sd^2
  ),
  byrow = F, ncol = 2
  )

## generate data
u <- as.data.frame(MASS::mvrnorm(n = 100, mu = c(0,0), Sigma = Sigma))
head(u, n=3)
## plot data
# plot(u)

ggplot2::ggplot(u, aes(x = V1, y = V2)) +
    labs(title = "rho = .6") + 
    geom_point()

2 Week 2: Bayesian data analysis

2.1 Bayes’ Rule

  • suppose you have 2 discrete events (A: the streets are wet, B: it is raining)

    • the probability of A given B is the joint probability of A and B divided by the marginal probability of that particular value of B (where P(B) > 0)
      • this conditional probability rules leads ot Bayes’ rule
  • P(A|B) = P(A,B)/P(B) where P(B) > 0

  • P(A,B) = P(B,A)

  • P(B,A) = P(B|A)P(A) = P(A|B)P(B) = P(A,B)

  • rearranging the terms, we get Bayes’ rule:

    • P(B|A) = P(A|B)P(B) / P(A)
  • but in real world we’re usually working with multivariate (and continuous) data

    • we can re-write Bayes’ rule in terms of density function (curly f)
    • f() refers to a PDF, not the probability of a single event
    • $theta$ is now a random variable: *f($theta$)
    • *f($theta\(|y)*: PDF of \$theta\) given our observed data (y)
  • an example with a discrete RV: if n = 10 trials and k = 7 successes, and if we suppose there are three possible values of $theta$: .1,.5, and .9 and each has the probability of 1/3, the likelihood function would be:

sum(dbinom(x = 7, # k successes
           size=10, # n trials
           prob=c(0.1,0.5,0.9) # theta values
           )
    )/3 # divided by 3 because each theta has p = 1/3
## [1] 0.05819729

2.2 Bayes’ rule in action: Binomial-beta conjugate case

# a discrete example
dbinom(x = 46, # k successes
       size = 100, # n trials
       prob = .5 # theta
       )
## [1] 0.0579584
  • the beta distribution is defined in terms of a and b
    • B(a,b) refers to a particular beta distribution with some values a and b
    • in R, a and b are written as shape1 and shape2
dbeta(x, # 
      shape1, # parameter a
      shape2 # parameter b
)
  • beta distribution’s a and b can be interpreted as our beliefs about prior successes and failurs
    • once we choose a and b, we can plot the beta PDF
# will spit out the *density* (y-axis value) at a certain point along the curve
dbeta(x = .5,
      shape1=1,
      shape2=1
      )
## [1] 1
dbeta(x = .5,
      shape1=3,
      shape2=3
      )
## [1] 1.875
dbeta(x = .5,
      shape1=6,
      shape2=6
      )
## [1] 2.707031
plot(function(x) 
  dbeta(x,shape1=1,shape2=1), 0,1,
      main = "Beta density",
  ylab="density",xlab="theta",ylim=c(0,3))

text(.5,1.1,"a=1,b=1")

plot(function(x) 
  dbeta(x,shape1=3,shape2=3),0,1,add=TRUE)
text(.5,1.6,"a=3,b=3")


plot(function(x) 
  dbeta(x,shape1=6,shape2=6),0,1,add=TRUE)
text(.5,2.6,"a=6,b=6")

  • the higher the values of a and b, the tighter your priors

    • compare the curves for when a and b were 1, 3, and 6 in
    • a = number of successes
    • b = number of failures
    • therefore, the prior mean will be 0.5 if a and b are equal
  • uninformative prior: a and b = 1 (flat line, all values are equally likely)

  • normalising the lieklihood allows us to visualize all three (prior, likelihood, posterior) in the same plot on the same scale

## observed values
x <- 46
n <- 100

## prior specification
a <- 210
b <- 21

binom_lh <- function(theta) {
  dbinom(x=x,
         size=n,
         prob=theta)
}

## normalising constant
K <- 1/integrate(f = binom_lh, lower=0,upper=1)$value

binom_scaled_lh <- function(theta)K*binom_lh(theta)
# Plot normalized prior, posterior, and observed data
## Likelihood
p_beta <- ggplot(data = tibble(theta = c(0, 1)), aes(theta)) +
  stat_function(
    fun = dbeta,
    args = list(shape1 = a, 
                shape2 = b),
    aes(linetype = "Prior")
  ) +
  ylab("density") +
  stat_function(
    fun = dbeta,
    args = list(shape1 = x + a, 
                shape2 = n - x + b), 
    aes(linetype = "Posterior")
  ) +
  stat_function(
    fun = binom_scaled_lh,
    aes(linetype = "Scaled likelihood")
  ) +
  theme_bw() +
  theme(legend.title = element_blank())
p_beta

  • here we see that the posterior is a compromise between the prior and the likelihood
  • the more precise the prior is, the more the posterior will shift to the prior
thetas<-seq(0,1,length=100)
op<-par(mfrow=c(3,1))

## prior
plot(thetas,dbeta(thetas,shape1=a,shape2=b),type="l",
     main="Prior",xlab="theta",ylab="density")

## lik
probs<-rep(NA,100) 

for(i in 1:100){
probs[i]<-dbinom(47,100,thetas[i])
}

plot(thetas,probs,main="Likelihood of x|theta_j",type="l",xlab="theta",ylab="density")

## post
x<-seq(0,1,length=100)

a.star<- x + a
b.star<- n - x + b

plot(x,dbeta(x,shape1=a.star,shape2=b.star),type="l",
     main="Posterior",xlab="theta",ylab="density")

2.3 Bayes’ rule in action: Poisson-Gamma conjugate case

  • imagine a dataset with the number of regressions as a DV (discrete continuous)
  • a and b are called shape and `rate``
# simulate data
round(rgamma(
  n=10,
  shape=3,
  rate=1),
  2)
##  [1] 3.92 2.20 5.53 0.92 0.86 3.60 0.39 4.29 5.00 4.26
# plot
x<-seq(0,10,by=0.01)
plot(x,dgamma(x,shape=3,rate=1),type="l",
     ylab="density")

  • to set our priors, we have to decide what the values of a and b will be
  • suppose we know the mean and variance from prior research is 3 and 1.5
    • in a gamma PDF, the mean is a over b and the variance is a over b-squared

\(\frac{a}{b}\) = 3 \(\frac{a}{b^{2}}\) = 1.5 = \(\frac{\frac{a}{b}}{b}\) = \(\frac{3}{b}\) = 1.5 then \(\frac{3}{1.5}\) = \(b\) = 2

2.4 Bayes’ rule in action: Poisson-Gamma conjugate case cont’d

  • we assume a Poisson likelihood for the data (number of regressive eye movements)
    • we know from expert knowledge that the mean rate of regressive eye movements is 3, with a variance of 1.5
    • recall that we know in a gamma distribution that the mean is a/b and the variance is a/(b^2). We can now solve for a and b
      • a/b = 3, a/(b^2) = 1.5
      • a/(b^2) = (a/b)/b = 3/b = 1.5, and so 3/1.5 = b

2.5 The posterior mean

  • we’re now going to look at the Poisson-gamma case

  • if we have a Gamma prior with a Poisson likelihood

  • recall: the Poisson distribution is for discrete cases and has one parameter: the rate (e.g., number of regressions)

    • we first need to define a PDF for \(\lambda\), which is the unknown rate; the \(gamma(a,b)\) distribution (for continuous data) is a good choice

Take aways - when data are sparse, the prior will dominate in determining the posterior mean - when a lot of data are available, the MLE will dominate in determining the posterior mean - give sparse data, informative priors based on expert knowledge, existing data, or meta-analysis will play an important role

3 Week 3: Computational Bayes

3.1 Computational Bayes I

Quick review:

  • Bayes’ rule when A and B are discrete events:

\[ P(A\mid B) = \frac{P(B\mid A) P(A)}{P(B)} \tag{2.1} \]

  • Bayes’ rule with distributions (\(\theta\) can be a vector of parameters):

\[ p(\theta\mid y) = \frac{p(y\mid \theta) p(\theta)}{p(y)} \tag{2.1} \]

  • \(\theta\) can be a vector of parameters in multivariate cases; this makes it difficult to calculate a posterior distribution (joint distribution) on paper

  • the posterior distribution is of central interest to us

  • we’re really interested in the posterior distribution up to proportionatlity (without the )

  • imagine we know what the posterior distribution is

    • you can now find out the creidble interval
# 95% credible interval
qgamma(
  c(.025,.975), 
  shape=20, # a
  rate=7) # b
## [1] 1.745217 4.238693
  • but with a large number of samples from this distirbution, we can use these samples to draw essentially the same conclusions:
# generate posterior samples from a gamma distribution: 4000 obvs
lambda_posterior <- rgamma(
  40000, # n
  shape = 20, # a
  rate = 7 # b
)

# now compute CrI using samples from the posterior distribution
round(quantile(lambda_posterior, probs = c(.025,.975)),2)
##  2.5% 97.5% 
##  1.75  4.24
  • this is obtaining samples for the posterior distributions

  • Bayesian analyses is really uncertainty quantification

  • these samples from the posterior distribution are what we can get through MCMC sampling

  • our main goal will always be to obtain this posterior distribution:

\[ p(\theta\mid{y}) \]

  • in the beta-binomial and Poisson-gamma cases, we could derive the posterior analystically
  • in more complex Bayesian models, we need to use some sampling method (MCMC sampling) to obtain posterior samples of the parameters

An example:

  • suppose we have data from a single subject, whose only task is to repeatedly press the spacebar without paying attention to any stimuli
  • the data are response times in ms per trial
  • we would like to know how long it takes to press a key for this subject
library(bcogsci)
data("df_spacebar")
head(df_spacebar,n=2)
  • good to always first look at your data, so let’s look at a density plot:
ggplot(df_spacebar, aes(rt)) +
  geom_density() +
  xlab("response times") +
  ggtitle("Button-press data")+theme_bw()

  • we see the peak is at about 180ms, and there’s a long tail

  • assume this statistical model (n: the n-th row in the data frame):

\[ rt_n \sim \mathit{Normal}(\mu, \sigma) end{equation} \]

3.2 Computational Bayes II

  • assume this statistical model (n: the n-th row in the data frame):

\[ rt_n \sim \mathit{Normal}(\mu, \sigma) \]

  • this model can also be written for a linear model, where rt_{n} = some intercept (\(\mu\)) + noise (\(\varepsilon\)), where the residuals of the noise have a normal distribution (\(iid\) means ‘independent and identically distributed’)

\[ rt_n = \mu + \varepsilon \hbox{, where } \varepsilon_n \stackrel{iid}{\sim} \mathit{Normal}(0,\sigma) \]

  • so it is assumed that the residuals are normally distributed, and that there is some unknown variable \(\mu\) and some unknown parameter \(\sigma\), that represents the variability around \(\mu\), and that the noise (\(\varepsilon\)) is Gaussian (normally distributed)

  • in a Frequentist model, we’d run:

m <- lm(rt~1, df_spacebar)
coef(m) # intercept, i.e., the mean value of the data
## (Intercept) 
##    168.6399
sigma(m) # residual error
## [1] 24.9118
# these are the same as mean and sd of RT
mean(df_spacebar$rt)
## [1] 168.6399
sd(df_spacebar$rt)
## [1] 24.9118
  • the intercept and residual error are just the maximum likelihood estimates from the data, i.e., the mean and sd

  • the linear model is pretty much giving you 2 MLE, one for the intercept and for the residual error

  • Frequentist: \(\mu\) and \(\sigma\) are fixed, unknown point values in frequentist models

  • in the Bayesian world, \(\mu\) and \(\sigma\) are random variables and need prior distributions specified for them

    • i.e., we don’t just have the MLE
    • we define priors on the \(\mu\) and \(\sigma\) priors; these are no longer unknown points out there in nature, there is some prior knowledge being brought in
  • let’s start with (unrealistic) flat/uniform priors:

\[ \mu \sim Uniform(0,60000)\\ \sigma \sim Uniform(0,2000) \]

  • uniform priors mean that all the values between a and b are equally likely (hence the ‘flat’ label; it’s not a curve but a flat line)

  • you can choose whatever prior makes sense to you

  • the function brm has the same formula syntax as lm

fit_press <- brm(
  # model specification
  rt ~ 1,
  data = df_spacebar,
  # the likelihood assumed:
  family = gaussian(),  # likelihood has normal distr.
  #   prior specifications:
  prior = c(
    prior(
      uniform(0, 60000), # uniform/flat prior
      class = Intercept, # mean
      lb = 0,  # lb/ub: technical details, truncate the sampling
      ub = 60000
    ),
    prior(uniform(0, 2000), # uniform/flat prior
          class = sigma, # sd
          lb = 0,
          ub = 2000)
  ),
  # sampling specifications
  # Technical stuff
  # backend = "cmdstanr",
  chains = 4, # this is the default
  iter = 2000, # this is the default
  warmup = 1000 # this is the default
  )
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
  • chains: the number of independent runs for sampling (by default 4)

  • iter: the number of iterations that the sampler makes to sample from the posterior distribution of each paramter (by default 2000)

  • warmup: the number of iterations from the start of sampling that are eventually discarded (by default half of ‘iter’) (in WinBUGS/JAGS this is called burn-in)

  • relevant reading: sampling and convergence in a nutshell in Ch. 3

plot(fit_press) 

  • we get a visual graphic of how the sample was searching in the posterior distribution

  • the caterpillar plot shows us the chains sitting on top of each other; a fat hairy caterpillar means the chains are all consistantly sampling from the same distribution, this is good

  • if we don’t see a afat hairy caterpillar, this indicates a convergence failure (if the chains are not really aligning)

  • also try using shinystan:

library(shinystan)
launch_shinystan(fit_press)
# extract the posterior distributions as vectors
as_draws_df(fit_press) %>% 
  head(3)
# compute mean
as_draws_df(fit_press)$b_Intercept %>% 
  mean()
## [1] 168.6372
# compute 95% CrI for the mean
as_draws_df(fit_press)$b_Intercept %>%
  quantile(c(.025,.975))
##     2.5%    97.5% 
## 166.0262 171.1923
# compute sd
as_draws_df(fit_press)$sigma %>% 
  mean()
## [1] 24.99241
# computer 95% CrI for sigma
as_draws_df(fit_press)$sigma %>% 
  quantile(c(.025,.975))
##     2.5%    97.5% 
## 23.22645 27.00463

3.3 Computational Bayes III

  • we’ll now talk about
    • prior predictive distributions
    • posterior predictive distributions

Prior predictive distributions

The model specification, again:

\[ \mu \sim Uniform(0,6000)\\ \sigma \sim Uniform(0,2000)\\ rt_{n} \sim Normal(\mu, \sigma) \]

  • we can generate the prior predictive distirubiton given the model above (\(\theta =< \mu,\sigma>\)) by integrating out the theta parameters

  • to do this, do the following many times:

  1. Take one sample from each of the priors
mu <- runif(1, min=0, max=60000)
sigma <- runif(1, 0, 2000)
  1. Plug those samples into the probability density/mass function used as the lieklihood in the model to generate a data set \(y_{pred1}, ..., y_{predn}\)
y_pred_1 <- rnorm(n=5, mu, sigma)
y_pred_1
## [1] 39843.65 39766.19 39666.18 39829.95 39686.40
  • each sample is an imaginary or portential data set

  • in the textbook, there’s code for generating prior predictive data using R

  • what are our options regarding the priors?

  • in the book, there are some classifications of priors (\(Normal+\) indicates normal distribution truncated at 0, i.e., no negative values):

    • flat, uniformative priors: e.g., \(\mu \sim Uniform(-10^{20}, 10^{20})\)
      • very wide range for mean, and very large sd
      • but for reaction/reading times, this isn’t reasonable
    • regularising priors: e.g., \(\mu \sim Normal_+(0,1000)\)
      • here we use 0-truncated normal distribution (positive values only)
      • but the only information we’ve given is really that the mean must be positive
    • principled priors: e.g., \(\mu \sim Normal(250,100)\)
      • principled because it centeres the mean at 250, which is reasonable, but is quite liberal in the sd (=100)
    • informative priors: e.g., \(\mu \sim Normal+(200,20)\)
      • if doing hypothesis testing with Bayes’ factor, using informative priors is extremely important
      • this is a much tighter prior; these are generally used in real research when possible (given enough prior knowledge)

3.4 Computation Bayes IV

  • now we’ll see what happens when we specify different priors in our model
  • there’s no standard terminology for types of priors, but we use:
    • flat/uninformative
    • regularising
    • principled
    • informative
  • we’re going to see how using these different types of prior specifications alter the posterior distribution
    • we’ll run a sensitivity analysis to see how affected by the prior our posterior
    • if you ahve lots of data, the posterior should be affected mainly by the likelihood estimate
  • let’s refit our model with flat, uninformative priors:

\[ \begin{align} \mu &\sim Uniform(-10^6,10^6)\\ \sigma &\sim Uniform(0,10^6)\\ rt_{n} &\sim Normal(\mu, \sigma) \end{align} \]

# new model with uninformative priors
fit_press_unif <- brm(
  # model specification
  rt ~ 1,
  data = df_spacebar,
  # the likelihood assumed:
  family = gaussian(),  # likelihood has normal distr.
  #   prior specifications:
  prior = c(
    prior(
      uniform(-10^6, 10^6), # uniform/flat prior
      class = Intercept, # mean
          lb = -10^6,
          ub = 10^6
    ),
    prior(uniform(0, 10^6), # uniform/flat prior
          class = sigma, # sd
          lb = 0,
          ub = 10^6
          )
  ),
  # sampling specifications
  # Technical stuff
  # backend = "cmdstanr",
  chains = 4, # this is the default
  iter = 2000, # this is the default
  warmup = 1000 # this is the default
  )
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
# earlier model
as_draws_df(fit_press)$b_Intercept %>%
  quantile(c(0.025, 0.975))
##     2.5%    97.5% 
## 166.0262 171.1923
# new flat/uninformative model
as_draws_df(fit_press_unif)$b_Intercept %>%
  quantile(c(0.025, 0.975))
##     2.5%    97.5% 
## 166.0792 171.2173
  • and a very informative prior:

\[ \mu \sim Normal(400,10) \sigma \sim Normal_{+}(100,10) rt_{n} \sim Normal(\mu, \sigma) \]

# new model with uninformative priors
fit_press_inf <- brm(
  # model specification
  rt ~ 1,
  data = df_spacebar,
  # the likelihood assumed:
  family = gaussian(),  # likelihood has normal distr.
  #   prior specifications:
  prior = c(
    prior(
      normal(400, 10), 
      class = Intercept),
    # brms knows that SDs need to be bounded
    # to exclude values below zero:
    prior(normal(100,10), # informative
          class = sigma # sd
          )
  ),
  # sampling specifications
  # Technical stuff
  # backend = "cmdstanr",
  chains = 4, # this is the default
  iter = 2000, # this is the default
  warmup = 1000 # this is the default
  )
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
# new flat/uninformative model
as_draws_df(fit_press_inf)$b_Intercept %>%
  quantile(c(0.025, 0.975))
##     2.5%    97.5% 
## 170.1966 175.7443
  • these posteriors are shifted a bit to the right (bigger) because the prior was very tight, but not by much

  • and with a principled prior:

\[ \mu \sim Normal(200,100) \sigma \sim Normal_{+}(50,50) rt_{n} \sim Normal(\mu, \sigma) \]

fit_press_prin <- brm(rt ~ 1,
  data = df_spacebar,
  family = gaussian(),
  prior = c(
    prior(normal(200, 100), class = Intercept),
    # brms knows that SDs need to be bounded
    # to exclude values below zero:
    prior(normal(50, 50), class = sigma)
  ),
  # sampling specifications
  # Technical stuff
  # backend = "cmdstanr",
  chains = 4, # this is the default
  iter = 2000, # this is the default
  warmup = 1000 # this is the default
)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
# new principled model
as_draws_df(fit_press_prin)$b_Intercept %>%
  quantile(c(0.025, 0.975))
##     2.5%    97.5% 
## 166.1515 171.2139
  • between the informative and principled priors, we see the more uncertain (principled) prior has posterior more similar to the posteriors in the uninformative model
    • this means that with a principled prior the model is still relying mostly on the likelihood distribution
    • this is what we want
  • this was our sensitivity analysis
    • it showed that the posterior is not overly affected by the choice of prior (good!)
    • the posterior is a compromise between the prior and the likelihood
    • informative prior with sparse data \(\rightarrow\) the prior will dominate in determining the posterior
    • a lot of data \(\rightarrow\) the likelihood will dominate in determining the posterior
  • it is good practice to carry out a sensitivity analysis (with more experience you’ll have a better handle of when it’s absolutely necessary)

3.5 Computational Bayes V

  • it’s best to use the log normal with right-skewed data

Using the log-normal likelihood

  • if \(y\) is log-normally distributed, then log(\(y\)) is normally distributed
    • the log-normal distribution is also defined using the parameters location (\(\mu\)) and scale (\(\sigma\))
  • \(\mu\) and \(\sigma\) are now on the log millisecond scale

\[ \begin{align} log(y) &\sim Normal(\mu,\sigma)\\ y &\sim LogNormal(\mu,\sigma) \end{align} \]

  • generating simulated data from a log normal
mu <- 6
sigma <- 0.5
N <- 500000

# generate N random smaples from log-normal distribution
sl <- rlnorm(N, mu, sigma)
ggplot(tibble(samples = sl), aes(samples)) +
  geom_histogram(aes(y = ..density..), binwidth = 50) +
  ggtitle("Log-normal distribution\n") +
  coord_cartesian(xlim = c(0, 2000))

  • we need to change our likelihood functions and the scale of our priors!

\[ rt_n \sim LogNormal(\mu,\sigma) \]

  • uniform priors:

\[ \begin{align} \mu &\sim Uniform(0,11)\\ \sigma &\sim Uniform(0,1) \end{align} \] - generate prior predictive distributions:

normal_predictive_distribution <-
  function(mu_samples, sigma_samples, N_obs) {
    # empty data frame with headers:
    df_pred <- tibble(
      trialn = numeric(0),
      rt_pred = numeric(0),
      iter = numeric(0)
    )
    # i iterates from 1 to the length of mu_samples,
    # which we assume is identical to
    # the length of the sigma_samples:
    for (i in seq_along(mu_samples)) {
      mu <- mu_samples[i]
      sigma <- sigma_samples[i]
      df_pred <- bind_rows(
        df_pred,
        tibble(
          trialn = seq_len(N_obs), # 1, 2,... N_obs
          rt_pred = rnorm(N_obs, mu, sigma),
          iter = i
        )
      )
    }
    df_pred
  }
N_samples <- 1000
N_obs <- nrow(df_spacebar)
mu_samples <- runif(N_samples, 0, 11)
sigma_samples <- runif(N_samples, 0, 1)
prior_pred_ln <- normal_predictive_distribution(
  mu_samples = mu_samples,
  sigma_samples = sigma_samples,
  N_obs = N_obs
) %>%
  mutate(rt_pred = exp(rt_pred))
  • now let’s take a look at some distributions of the simulated data sets
prior_pred_ln %>%
  group_by(iter) %>%
  summarize(
    min_rt = min(rt_pred),
    max_rt = max(rt_pred),
    mean_rt = mean(rt_pred),
    median_rt = median(rt_pred)
  ) %>%
  pivot_longer(cols = ends_with("rt"), names_to = "stat", values_to = "rt") %>%
  mutate(stat = factor(stat, levels = c("mean_rt", "median_rt", "min_rt", "max_rt"))) %>%
  ggplot(aes(rt)) +
  scale_x_continuous("Response times in ms",
    trans = "log", breaks = c(0.001, 1, 10, 100, 1000, 10000, 100000)
  ) +
  geom_histogram(aes(y = ..density..)) +
  ylab("density") +
  facet_wrap(~stat, ncol = 1)+theme_bw()

  • these are very broad ranges, because our prior specifications were so broad

3.6 Computational Bayes VI

  • we saw a model with ridiculous priors
    • we know they’re ridiculous because of our prior knowledge about reaction times

    • more informative priors:

\[ \begin{align} \mu &\sim Normal(6,1.5)\\ \sigma &\sim Normal_+(0,1) \end{align} \] - and update our model + brms knows sd is truncated at 0 so we don’t have to specify lb and ub here; if you were writing directly in Stan code you’d have to specify it yourself

df_spacebar_ref <- df_spacebar %>%
  mutate(rt = rep(1, n()))
fit_prior_press_ln <- brm(rt ~ 1,
  data = df_spacebar_ref,
  family = lognormal(), # log instead of gaussian
  prior = c(
    prior(normal(6, 1.5), class = Intercept), # on the log scale
    prior(normal(0, 1), class = sigma)
  ),
  sample_prior = "only", # produce prior samples
  control = list(adapt_delta = .9) # this was added for convergence reasons
)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
  • to look at distribution of means of repeated samples based on priors
pp_check(fit_prior_press_ln, type = "stat", stat = "mean",
         prefix = "ppd") +
  coord_cartesian(xlim = c(0.001, 300000)) +
  scale_x_continuous("Response times [ms]",
    trans = "log",
    breaks = c(0.001, 1, 100, 1000, 10000, 100000),
    labels = c(
      "0.001", "1", "100", "1000", "10000",
      "100000"
    )
  ) +
  ggtitle("Prior predictive distribution of means")

  • we see the maximum values are a bit too large, but these are a huge improvement over those that we saw for the uniform priors in the last video
p1 <- pp_check(fit_prior_press_ln, type = "stat", stat = "mean", prefix = "ppd") +
  coord_cartesian(xlim = c(0.001, 300000)) +
  scale_x_continuous("Response times [ms]",
    trans = "log",
    breaks = c(0.001, 1, 100, 1000, 10000, 100000),
    labels = c(
      "0.001", "1", "100", "1000", "10000",
      "100000"
    )
  ) +
  ggtitle("Prior predictive distribution of means")
p2 <- pp_check(fit_prior_press_ln, type = "stat", stat = "min", prefix = "ppd") +
  coord_cartesian(xlim = c(0.001, 300000)) +
  scale_x_continuous("Response times [ms]",
    trans = "log",
    breaks = c(0.001, 1, 100, 1000, 10000, 100000),
    labels = c(
      "0.001", "1", "100", "1000", "10000",
      "100000"
    )
  ) +
  ggtitle("Prior predictive distribution of minimum values")
p3 <- pp_check(fit_prior_press_ln, type = "stat", stat = "max", prefix = "ppd") +
  coord_cartesian(xlim = c(0.001, 300000)) +
  scale_x_continuous("Response times [ms]",
    trans = "log",
    breaks = c(0.001, 1, 100, 1000, 10000, 100000),
    labels = c(
      "0.001", "1", "10", "1000", "10000",
      "100000"
    )
  ) +
  ggtitle("Prior predictive distribution of maximum values")
cowplot::plot_grid(p1, p2, p3, nrow = 3, ncol =1)

  • now fit the model to the data with a log normal likelihood
fit_press_ln <- brm(rt ~ 1,
  data = df_spacebar,
  family = lognormal(),
  prior = c(
    prior(normal(6, 1.5), class = Intercept),
    prior(normal(0, 1), class = sigma)
  )
)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
fit_press_ln
##  Family: lognormal 
##   Links: mu = identity; sigma = identity 
## Formula: rt ~ 1 
##    Data: df_spacebar (Number of observations: 361) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Population-Level Effects: 
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     5.12      0.01     5.10     5.13 1.00     3730     2727
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.13      0.01     0.13     0.14 1.00     3162     2523
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

3.7 Computational Bayes

  • now we’ll evaluate the log-normal model with these relatively informative priors

  • back-transforming to ms:

estimate_ms <- exp(as_draws_df(fit_press_ln)$b_Intercept)

round(c(mean = mean(estimate_ms),
  sd = sd(estimate_ms),
  quantile(estimate_ms,
           probs = c(.025,.975))),
  1)
##  mean    sd  2.5% 97.5% 
## 167.1   1.2 164.7 169.5
  • these credible intervals are conditioned on the data we happened to get, they don’t reflect any true mean necessarily

  • we can also do a posterior predictive check:

pp_check(fit_press_ln, ndraws = 100)

  • question: are the posterior predicted data now more similar to the observed data, compared to the case where we had a Normal likelihood?
fit_press_norm <- brm(rt ~ 1,
  data = df_spacebar,
  family = gaussian(),
  prior = c(
    prior(uniform(0, 60000), class = Intercept, lb = 0,
          ub = 60000),
    prior(uniform(0, 2000), class = sigma, lb = 0,
          ub = 2000)
  ),
  chains = 4,
  iter = 2000,
  warmup = 1000
)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
ggpubr::ggarrange(
  pp_check(fit_press_norm, 
         type = "stat", stat = "min") +
  ggtitle("Normal model (min)")+
  theme(legend.position = "none"),
pp_check(fit_press_norm, 
         type = "stat", stat = "max") +
  ggtitle("Normal model (max)")+
  theme(legend.position = "none"),
pp_check(fit_press_ln, 
         type = "stat", stat = "min") +
  ggtitle("Log model (min)")+
  theme(legend.position = "none"),
pp_check(fit_press_ln, 
         type = "stat", stat = "max") +
  ggtitle("Log model (max)") +
  theme(legend.position = "none"),
cowplot::get_legend(pp_check(fit_press_norm) + theme(legend.position = "bottom")),
nrow = 3, ncol = 2,
heights = c(.45,.45,.1),
labels = c("A","B","C","D")
)

  • here we see the normal model is producing values too small in the minimum values (more values lower than the actual minimum value), while the log model is doing a better job (A vs. C)
  • for both normal and log models, the maximum reaction time is underestimated (B vs. D)

3.8 Summary

  • we saw 2 simple examples of a liner model, with two differet likelihoods (normal and log normal)

  • one key skill we learned was to examine and interpret the piror predictive distribution

  • another key skill: interpreting the posterior predictive distribution

  • these two distributions tell us how well themodel represents the realiy, both before and after observing the particular data we have

  • Next:

    • addint a predictor: \(y = \alpha + \beta \times x + \epsilon\)
    • logistic regression
    • first steps in modeling repeated measures (dependent) data with hierarchical models: \(y = \alpha + u_0 + \beta \times x + \epsilon\)

4 Week 4: Regression models

4.1 Example: Multiple object tracking

  • participant has to track between 0:5 objects on the screen, when there are several distractor objects on the screen as well

  • examining attentional load, and its effect on pupil size

  • our model:

\[ p\_size_n \sim Normal(\alpha + c_load_n \cdot \beta, \sigma) \]

  • \(\beta\) = slope (determined by cognitive load)
  • \(\alpha\) = intercept, which will be the average effect of pupil size
  • every data point is assumed to be independent (in frequentist terms: iid)
data("df_pupil_pilot")
df_pupil_pilot$p_size %>% summary()
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   851.5   856.0   862.0   860.8   866.5   868.0
  • this suggests we can use the following regularizing prior for \(\alpha\)

\[ \alpha \sim Normal(1000,500) \]

  • we are expressing with these priors that our plausible values for the Intercept will be (95% CrI)
round(
  qnorm(c(.025,.975), 
        mean = 1000, 
        sd = 500),
  2)
## [1]   20.02 1979.98
  • for \(\sigma\), we use an uninformative prior

\[ \sigma \sim Normal_+(0,1000) \]

round(extraDistr::qtnorm(
  c(.025, .975),
  mean = 0,
  sd = 1000,
  a = 0 # truncate at 0
),
1)
## [1]   31.3 2241.4
  • \(\beta\) is ‘most important’ (effect of attentional load on pupil size)

\[ \beta \sim Normal(0,100) \]

  • not truncated, meaning we are agnostic in terms of whether the effect will be positive or negative
round(
  qnorm(c(.025,.975),
      mean = 0, sd = 100),
  1)
## [1] -196  196
  • these are what we’re saying we think the plausible values of \(\beta\) will be

  • let’s now work with the ‘real’ data, and centre the predictor

    • do this by subtracting the mean from each value
    • negative centered values represent values lower than the mean
  • we see this data is repeated measure, because we have multiple data points from each participant for each load (2 times load = 0, for example)

data("df_pupil")
df_pupil <- df_pupil %>%
  mutate(c_load = load - mean(load)); head(df_pupil,7)
  • fit the model
# only sigma is truncated at 0 in our priors, and it is also truncated at 0 by default in stan
# therefore, we don't need to set any ub/lb
fit_pupil <- brm(p_size ~ c_load,
                 data = df_pupil,
                 family = gaussian(),
                 prior = c(
                   prior(normal(1000,500), class = Intercept),
                   prior(normal(0,1000), class = sigma),
                   prior(normal(0,100), class = b, coef = c_load)
                 ))
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1

4.2 Example: Multiple Object Tracking II

  • now let’s see what we got out of this model
  • use the plot() function to check the convergence
    • we also get the slope for c_load (written as b_c_load)
plot(fit_pupil)

  • we see:

    • intercept is around 700 (the average pupil size, because we centered the predictor)
    • c_load: range of variability in pupil size change with an increase of 1 unit of load
      • effect size is mainly between 10-60
  • we see sigma is in the positive range

  • we also see the chains are on top of each other (and so are sampling from the same distribution)

# a function they wrote, will be shared with us
short_summary(fit_pupil)

4.2.1 Posterior predictive check

  • we can now get more future simulated data sets
  • having fit the model, we can generate new data repeatedly using our posterior distributions of our parameters
for (l in 0:4) { # for the cognitive load levels 0:4
  df_sub_pupil <- filter(df_pupil, load == l) # filter out other loads
  d <- pp_check(fit_pupil, # run pp_check
    type = "dens_overlay",
    ndraws = 100,
    newdata = df_sub_pupil # using this new data with filtering
  ) +
    # and plot it
    geom_point(data = df_sub_pupil, aes(x = p_size, y = 0.0001)) +
    ggtitle(paste("load: ", l)) +
    coord_cartesian(xlim = c(400, 1000))
  # print(d)
    # and store object as p_l
  name <- paste0("dens_",l)
  assign(name, d)
}
ggpubr::ggarrange(
  dens_0 + theme(legend.position = "none"),
  dens_1 + theme(legend.position = "none"),
  dens_2 + theme(legend.position = "none"),
  dens_3 + theme(legend.position = "none"),
  dens_4 + theme(legend.position = "none"),
  cowplot::get_legend(dens_4),
  nrow=2, ncol = 3)

  for (l in 0:4) {
  df_sub_pupil <- filter(df_pupil, load == l)
  p <- pp_check(fit_pupil,
    type = "stat",
    ndraws = 1000,
    newdata = df_sub_pupil,
    stat = "mean"
  ) +
    geom_point(data = df_sub_pupil, aes(x = p_size, y = 0.1)) +
    ggtitle(paste("load: ", l)) +
    coord_cartesian(xlim = c(400, 1000))
  # print(p)
  # and store object as p_l
  name <- paste0("p_",l)
  assign(name, p)
  }
ggpubr::ggarrange(
  p_0 + theme(legend.position = "none"),
  p_1 + theme(legend.position = "none"),
  p_2 + theme(legend.position = "none"),
  p_3 + theme(legend.position = "none"),
  p_4 + theme(legend.position = "none"),
  cowplot::get_legend(p_4),
  nrow=2, ncol = 3)

4.3 Log-normal likelihood

  • let’s now use the spacebar button press data again
    • use trial as a predictor to see if there’s a practice effect (speed up) or fatigue effet (slowdown)
    • let’s first centre trial so that 0 means the average trial ID
df_spacebar <- df_spacebar %>%
  mutate(c_trial = trial - mean(trial))
  • we’ll assume a log-normal likelihood (it’s 0 truncated, gonna have a positive skew)

\[ rt_n \sim LogNormal(\alpha + c_trial_n \cdot \beta, \sigma) \]

  • \(N\): total number of (independent) data points

  • \(n = 1,...,N\), and \(rt\): the dependent variable (RTs in milliseconds)

  • because we’re assuming a log normal likelihood, we now have to define priors on the \(\alpha\), \(\beta\), and \(\sigma\) parameters in the log scale

\[ \begin{align} \alpha &\sim Normal(6,1.5)\\ \sigma &\sim Normal_+(0,1) \end{align} \]

  • the new parameter \(\beta\) (effect of trial) needs a prior specification as well
    • let’s try a seemingly uniformative prior

\[ \beta \sim Normal(0,1) \]

  • let’s look at the prior predictive checks using the brm function, but with:
    • family = lognormal() to set log normal distribution
    • sample_prior = "only" to not compute posteriors
  • this step helps us decide if our priors are reasonable or not
# Ignore the dependent variable,
# use a vector of ones to create dummy data
df_spacebar_ref <- df_spacebar %>%
  mutate(t = rep(1, n()))

fit_prior_press_trial <- brm(t ~ 1 + c_trial,
  data = df_spacebar_ref, # dummy data, cause we're looking at priors only
  family = lognormal(), # log normal distribution
  prior = c(
    prior(normal(6, 1.5), class = Intercept),
    prior(normal(0, 1), class = sigma),
    prior(normal(0, 1), class = b, coef = c_trial)
  ),
  sample_prior = "only", # PRIOR PRED DISTR
  control = list(adapt_delta = .9)
)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
  • you can also look at some statsitic from the prior predictive distributions that tell us something about the range of variability of that statistic
    • for example, let’s look at the median difference in RTs between sequential trials
    • we can then look at the prior predictive distribution of the median difference in RTs from one trial to the next based on our priors
# calculate median
median_diff <- function(x) {
  median(x - lag(x), na.rm = TRUE)
}
# plot pp_check for priors
pp_check(fit_prior_press_trial,
         type = "stat",
         stat = "median_diff",
  # show only prior predictive distributions       
         prefix = "ppd",
  # each bin has a width of 500ms       
         binwidth = 500) +
  # cut the top of the plot to improve its scale
  coord_cartesian(ylim = c(0, 50))

  • we see this is a pretty liberal prior, predicting a huge amount of variability up to the 10’s of thousands (weird for a single button presss!)
  • so let’s try a more informative prior for \(\beta\) (effect of the predictor)

\[ \beta \sim Normal(0,0.01) \] - try it again with these more informative priors

fit_prior_press_trial <- brm(t ~ 1 + c_trial,
  data = df_spacebar_ref, # dummy data, cause we're looking at priors only
  family = lognormal(), # log normal distribution
  prior = c(
    prior(normal(6, 1.5), class = Intercept),
    prior(normal(0, 1), class = sigma),
    prior(normal(0, .01), class = b, coef = c_trial)
  ),
  sample_prior = "only", # PRIOR PRED DISTR
  control = list(adapt_delta = .9)
)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
# calculate median
median_diff <- function(x) {
  median(x - lag(x), na.rm = TRUE)
}
# plot pp_check for priors
pp_check(fit_prior_press_trial,
         type = "stat",
         stat = "median_diff",
  # show only prior predictive distributions       
         prefix = "ppd",
  # each bin has a width of 500ms       
         binwidth = 500) +
  # cut the top of the plot to improve its scale
  coord_cartesian(ylim = c(0, 50))

  • now we see it’s much more reasonable, the distribution is much tighter around 0
    • we know the posterior for the slope will be informed by the data (because we have a lot of data for this expeirment), so the prior doesn’t need to be absolutely perfect
    • you can always check with a sensitivity analysis that your prior is not unduly overinfluencing the posterior

4.4 Button-pressing time and the effect of trial

  • having specified our priors, we’ll now fit the model with the data
  • if using the log-normal scale, be sure to specify family = lognormal()!
fit_press_trial <- brm(rt ~ 1 + c_trial,
  data = df_spacebar, # dummy data, cause we're looking at priors only
  family = lognormal(), # log normal distribution
  prior = c(
    prior(normal(6, 1.5), class = Intercept),
    prior(normal(0, 1), class = sigma),
    prior(normal(0, .01), class = b, coef = c_trial)
  ),
  control = list(adapt_delta = .9)
)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
  • let’s plot the model fit
plot(fit_press_trial)

pp_check(fit_press_trial,
         type = "dens_overlay",
    ndraws = 100)

  • we see the slope parameter (b_c_trial) has very small values, which is difficult to interpret
    • would be good to back-transform
    • we can now extract the posterior distributions as vectors of data for each of the relevant paramters (\(\alpha, \beta, \sigma\) parameters), which will have samples inside th model
  • to extract these distributions, we can use the function as_draws_df(data)$parameter
alpha_samples <- as_draws_df(fit_press_trial)$b_Intercept
beta_samples <- as_draws_df(fit_press_trial)$b_c_trial
  • to find the effect estimate (in ms) from the middle (centred trials = 0) of the experiment to the preceeding trial (centred trials = -1):

\[ exp(\alpha) - exp(\alpha - \beta) \]

  • where:
    • \(exp(\alpha)\) is the vector of sample intercepts, i.e., the mean RT at the centered trial (trial = 0) in the middle of the experiment, and
    • \(exp(\beta)\) is the slope, i.e., the change in the average RT with every unit increase (by 1 unit, i.e., difference between 2 trials)
  • and if we exponentiate both side, we get everything back on the ms scale
beta_ms <- exp(alpha_samples) - exp(alpha_samples - beta_samples)
  • so we’re modelling log(rt) in terms of \(\alpha\) (intercept) and \(\beta\) (slope) as predicted by the centred trial:

\[ \begin{align} log(rt) &= \alpha + \beta \cdot c\_trial\\ exp(log(rt)) &= exp(\alpha + \beta \cdot c\_trial)\\ rt &= exp(\alpha) - exp(\alpha - \beta) \end{align} \]

  • and we can now compute some simple statistics
beta_msmean <- round(mean(beta_ms), 3)
beta_mslow <- round(quantile(beta_ms, prob = .025),3)
beta_mshigh <- round(quantile(beta_ms, prob = .975),3)
# print
c(beta_msmean, beta_mslow, beta_mshigh)
##        2.5% 97.5% 
## 0.087 0.067 0.108
  • we see there is a slow down of 0.087 milliseconds going from the -1 trial to the 0 trial
    • what about differences between e.g., the first and second trials?
first_trial <- min(df_spacebar$c_trial) # because the min trial is the 1st trial
second_trial <- min(df_spacebar$c_trial) + 1 # and that # + 1 is the 2nd trial
  • now use the same calculations as before to find the effect in ms, but now we must multiply beta by the trial number, because it equals the effect at the centre of the c_trials
# create vector or sample parameters for the relevant trials
effect_beginning_ms <-
  exp(alpha_samples + second_trial * beta_samples) -
  exp(alpha_samples + first_trial * beta_samples)

# print stats
round(c(mean = mean(effect_beginning_ms),
  quantile(effect_beginning_ms, c(.025,.975))),3)
##  mean  2.5% 97.5% 
## 0.080 0.062 0.096
  • but what is the slowdown after 100 trials from the middle of the experiment?
effect_100 <-
  exp(alpha_samples + 100 * beta_samples) - # 100 x beta to see the effect of 100 trials
  exp(alpha_samples)

# print stats
round(c(mean = mean(effect_100),
  quantile(effect_100, c(.025,.975))),2)
##  mean  2.5% 97.5% 
##  8.99  6.82 11.13
  • now let’s plot the posterior predictive distribution of *median differences8 between n and n-100th trial:
median_diff100 <- function(x) median(x - lag(x, 100), na.rm = TRUE)

ggpubr::ggarrange(
  # as a histogram
  pp_check(fit_press_trial,
           type = "stat",
           stat = "median_diff100") +
    labs(title = "Histogram") +
    theme(legend.position = "bottom"),
  
  # and as a density plot
  pp_check(
    fit_press_trial,
    ndraws = 100,
    type = "dens_overlay",
    stat = "median_diff100") +
    labs(title = "Density plot") +
    theme(legend.position = "bottom"),
  # settings
  nrow = 1,
  ncol = 2
)

  • the histogram posterior predictive distribution is very useful for informing the variability in future studies

4.5 Logistic regression

  • a special case of generalised linear model: logisitc regression
  • example experiment:
    • participant sees a different number of words, and are prompted to give some word they were presented (e.g., the 2nd word)
    • the set size = the number of words presented in sequence
    • question: what is the effect of set size on recall accuracy?
  • this will have a Bernoulli distribution, because this data is binomial (correct or incorrect)
data("df_recall") %>%
  head()
## [1] "df_recall"
# centre set_size
df_recall <- df_recall %>%
  mutate(c_set_size = set_size - mean(set_size))
  • we centred the set_size so that the intercept represents the grand mean
    • so now we will have the centred set_size at 0, with set_sizes below having a negative value, and above will have a positive value
# print number of observations per set_size
df_recall %>%
  group_by(set_size) %>%
  count()
  • the Bernoulli distribution has only one trial, and there’s some probability of success (\(\theta\))

\[ correct_n \sim Beroulli(\theta_n) \]

  • where \(_n\) represents a row in the data frame (i.e., observation)

  • and \(\theta\) is the probability of success

  • we’re gonna take the odds of the \(\theta_n\)’s and take the log of them

\[ \eta_n = g(\theta_n) = log(\frac{\theta_n}{1 - \theta_n}) \]

  • we need to convert the Bernoulli values of 0 and 1 to some continuous outcome in order to run a linear regression on these values

  • in the figure below, note that the logit link takes as its input \(\eta\) and spits out the probability (\(\theta\)), whereas the inverse takes as its input \(\theta\) and spits out \(\eta\)

x <- seq(0.001, 0.999, by = 0.001) # create vector from almost 0 to almost 1
y <- log(x / (1 - x)) # convert this to a log-odds scale
logistic_dat <- data.frame(theta = x, eta = y) # create df with probabilities (theta) and eta (log-odds transformation of probabilities)

p1 <- qplot(logistic_dat$theta, logistic_dat$eta, geom = "line") + xlab(expression(theta)) + ylab(expression(eta)) + ggtitle("The logit link") +
  annotate("text",
    x = 0.3, y = 4,
    label = expression(paste(eta, "=", g(theta))), parse = TRUE, size = 8
  ) + theme_bw()


p2 <- qplot(logistic_dat$eta, logistic_dat$theta, geom = "line") + xlab(expression(eta)) + ylab(expression(theta)) + ggtitle("The inverse logit link (logistic)") + 
  annotate("text",
  x = -3.5, y = 0.80,
  label = expression(paste(theta, "=", g^-1, (eta))), parse = TRUE, size = 8
) + theme_bw()

ggpubr::ggarrange(p1, p2, ncol = 2, labels = c("A", "B"))

4.5.1 Priors for logistic regression

  • we don’t have error terms in our model at the moment, we can worry about that later

  • once we have figured out the \(\alpha\) and \(\beta\), we can compute the probabilities for any value of \(\eta_n\)

    • the model is fit on the log-odds scale, but we can always convert them back to the probability scale
  • qlogis(p): gives logit

  • plogis(x): gives inverse logit

  • if we try priors such as \(\alpha \sim Normal(0,4)\), we’d see higher probabilities near probabilties of 0 and 1

samples_logodds <- tibble(alpha = rnorm(100000, 0, 4))
samples_prob <- tibble(p = plogis(rnorm(100000, 0, 4)))

ggpubr::ggarrange(
  ggplot(samples_logodds, aes(alpha)) +
    geom_density() +
    labs(title = "Prior in log-odds (0,4)") +
    theme_bw(),
  ggplot(samples_prob, aes(p)) +
    geom_density() +
    labs(title = "Prior in probability (0,4)") +
    theme_bw(),
  nrow = 1,  ncol = 2,
  labels = c("A","B")
)

  • what about a more constrained prior: \(\alpha \sim Normal(0,1.5)\)
    • looks better
samples_logodds <- tibble(alpha = rnorm(100000, 0, 1.5))
samples_prob <- tibble(p = plogis(rnorm(100000, 0, 1.5)))

ggpubr::ggarrange(
  ggplot(samples_logodds, aes(alpha)) +
    geom_density() +
    labs(title = "Prior in log-odds (0,1.5)") +
    theme_bw(),
  ggplot(samples_prob, aes(p)) +
    geom_density() +
    labs(title = "Prior in probability (0,1.5)") +
    theme_bw(),
  nrow = 1,  ncol = 2,
  labels = c("A","B")
)

4.5.2 Running the model: logistic regression

  • remember, we aren’t commited to just one set of priors
    • we can try a variety of priors (sensitivity analysis)
fit_recall <- brm(correct ~ 1 + c_set_size,
                  data = df_recall,
                  family = bernoulli(link = logit),
                  prior = c(
                    prior(normal(0,1.5), class = Intercept),
                    prior(normal(0,.1), class = b, coef= c_set_size)
                    ))
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
  • we now have the Bernoulli distribution iwth a logit-link function
plot(fit_recall)

  • b_Intercept is giving the distribution of grand averages

  • b_c_set_size are giving chagne in accuracies (log odds) with an increase of one level of set size

  • and check out the estimates:

posterior_summary(fit_recall,
                  variable = c("b_Intercept", "b_c_set_size"))
##                Estimate  Est.Error       Q2.5      Q97.5
## b_Intercept   1.9046294 0.29186157  1.3528888  2.5241813
## b_c_set_size -0.1814657 0.08063706 -0.3431062 -0.0259217
  • but let’s change the unit of measurement back to probabilities
alpha_samples <- as_draws_df(fit_recall)$b_Intercept # sample of intercepts
av_accuracy <- plogis(alpha_samples)
round(
  c(mean = mean(av_accuracy), quantile(av_accuracy, c(0.025, 0.975))),
  3)
##  mean  2.5% 97.5% 
## 0.867 0.795 0.926
beta_samples <- as_draws_df(fit_recall)$b_c_set_size
beta_mean <- round(mean(beta_samples),5)
beta_low <- round(quantile(beta_samples, prob = .025),5)
beta_high <- round(quantile(beta_samples, prob = .975),5)
  • we see accuracy is on average 87%, with a 95% CrI in the range of 79-93%

  • what is the change in accuracy for a change in set size by 1 unit?

beta_samples <- as_draws_df(fit_recall)$b_c_set_size # slopes for centred set_size

# effect at CENTRED set_size = 0
effect_middle <- plogis(alpha_samples) -
  plogis(alpha_samples - beta_samples)

# summary stats
round(
  c(mean = mean(effect_middle),
  quantile(effect_middle, c(0.025, 0.975))),
  3)
##   mean   2.5%  97.5% 
## -0.019 -0.037 -0.003
  • but what about for a change from set size 2 to 4?
four <- 4 - mean(df_recall$set_size) # 'centred' value for set size 4
two <- 2 - mean(df_recall$set_size) # 'centred' value for set size 2

# extract differeces from the samples
effect4m2 <-
  plogis(alpha_samples + four * beta_samples) -
  plogis(alpha_samples + two * beta_samples)

# compute stats
round(
  c(mean = mean(effect4m2),
    quantile(effect4m2,
             c(.025,.975))),
  3)
##   mean   2.5%  97.5% 
## -0.030 -0.053 -0.006

4.6 Hierarchical regression

  • hierarchical/mixed/multilevel models
  • some sample EEG data (from Nieuwland et al 2018, collected in Edinburgh)
    • we first centre cloze probability, our predictor
    • prediction: negative slope, as cloze probability decreases N400 increases
    • but now we have repeated measurements for each participant subj
      • this violates the independence assumption of linear models; we solve this by including participant as a random effect
data("df_eeg")
(df_eeg <- df_eeg %>%
    mutate(c_cloze = cloze - mean(cloze)))
df_eeg %>% ggplot(aes(n400)) +
  geom_histogram(
    binwidth = 4,
    colour = "gray",
    alpha = .5,
    aes(y = after_stat(density))
  ) +
  stat_function(fun = dnorm, args = list(
    mean = mean(df_eeg$n400),
    sd = sd(df_eeg$n400)
  )) +
  xlab("Average voltage in microvolts for 
       the N400 spatiotemporal window") + theme_bw()

  • we now have the following model

\[ signal_n \sim \mathit{Normal}(\alpha + u_{subj[n],1} + c\_cloze_n \cdot (\beta+ u_{subj[n],2}),\sigma) \]

  • where:
    • \(n\) indexes the row-id int he data
    • \(subj[n]\): subject id in the \(n\)-th row
    • \(u_{subj[n],1}\): intercept adjustment for a given participant_n
    • \(u_{subj[n],1}\): slope \(\beta\) adjustment for a given participant_n
  • we assume the by-subject variance is normally distributed and centred around 0
    • basically, we’re assuming some between-subject variability
  • we now have three variance components:
    • between-subject variability in intercepts: \(\tau_{u1} \sim Normal_+(0,20)\)
    • between-subject variability in slopes: \(\tau_{u2} \sim Normal_+(0,20)\)
    • within subject variability: \(\sigma \sim Normal_+(0,50)\)
  • we can now create a vector with our priors
prior_v <-
  c(
    prior(normal(0,10), class = Intercept), # intercept
    prior(normal(0,10), class = b, coef = c_cloze), # predictor slope
    prior(normal(0,50), class = sigma), # sd
    prior(normal(0,20), class = sd, coef = Intercept, group = subj), # by-px intercept
    prior(normal(0,20), class = sd, coef = c_cloze, group = subj) # by-px slope
  )
fit_N400_v <- brm(n400 ~ c_cloze + 
                    (c_cloze || subj),
                  prior = prior_v,
                  data = df_eeg)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
  • we aren’t including correlation in the RES
    • which would express whether, as the intercept adjustment e.g., increases, so does the slope adjustment/size of the effect for a given participant (a positive correlation)
fit_N400_v
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: n400 ~ c_cloze + (c_cloze || subj) 
##    Data: df_eeg (Number of observations: 2863) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~subj (Number of levels: 37) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     2.18      0.38     1.52     2.99 1.00     1607     2325
## sd(c_cloze)       1.73      0.90     0.11     3.49 1.00     1047     1237
## 
## Population-Level Effects: 
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     3.64      0.44     2.76     4.49 1.00     1571     2134
## c_cloze       2.32      0.61     1.16     3.52 1.01     4009     2932
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma    11.61      0.16    11.31    11.93 1.00     5926     2977
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
  • we see a mean effect of cloze probability of 2.32 95% CrI [1.165, 3.523]. The positive slope indicates that the larger the cloze probability, the larger the N400…counter intuitive? Should be the other way around I’d expect (negative correlation?).

  • in the posterior distributions, we see:

    • the grand mean b_Intercept
    • the grand mean slope (effect) b_c_cloze
    • sd of the subject intercept adjustment sd_subj_Intercept (\(\tau_{u1}\))
    • sd of by-subject slope adjustment sd_subj_c_cloze (\(\tau_{u2}\))
      • is quite large, maybe even larger than the subject intercept adjustment (so maybe there’s important variation in effect size between participants?)
    • within subject variance sigma
mcmc_dens(fit_N400_v, pars = variables(fit_N400_v)[1:5])